library(data.table)
library(dplyr)
library(DT)
library(ggplot2)
library(gtools)
library(vegan)
library(iNEXT)
library(fossil)
library(ggrepel)
Let’s read the dataset and remove the samples containing less than 49975 reads:
## [1] 46808 132
## OTUId st055_MD1013 st056_MD1035 st057_MD1042 st058_MD1080
## 1 OTU_2 9289 2293 16285 5140
## 2 OTU_106 738 25 1497 3309
## 3 OTU_403 81 3 238 6
## 4 OTU_139 106 23 211 58
## 5 OTU_4 230 78 776 150
## Warning in `[<-.factor`(`*tmp*`, thisvar, value = "NA"): invalid factor
## level, NA generated
## Warning in `[<-.factor`(`*tmp*`, thisvar, value = "NA"): invalid factor
## level, NA generated
## Warning in `[<-.factor`(`*tmp*`, thisvar, value = "NA"): invalid factor
## level, NA generated
## Warning in `[<-.factor`(`*tmp*`, thisvar, value = "NA"): invalid factor
## level, NA generated
## Warning in `[<-.factor`(`*tmp*`, thisvar, value = "NA"): invalid factor
## level, NA generated
## Warning in `[<-.factor`(`*tmp*`, thisvar, value = "NA"): invalid factor
## level, NA generated
## Warning in `[<-.factor`(`*tmp*`, thisvar, value = "NA"): invalid factor
## level, NA generated
## [1] 46808 131
## st055_MD1013 st056_MD1035 st057_MD1042 st058_MD1080 st059_MD1100
## OTU_2 9289 2293 16285 5140 1187
## OTU_106 738 25 1497 3309 1808
## OTU_403 81 3 238 6 37
## OTU_139 106 23 211 58 82
## OTU_4 230 78 776 150 561
## [1] 46690 129
## [1] 46690 7
## SILVA_classif
## OTU_2 FJ832119.1.1585_Eukaryota;SAR;Alveolata;Protalveolata;Syndiniales;Syndiniales_Group_I;uncultured_marine_picoplankton
## OTU_106 AY665056.1.1740_Eukaryota;SAR;Alveolata;Protalveolata;Syndiniales;Syndiniales_Group_I;uncultured_eukaryote
## OTU_403 KC488491.1.1699_Eukaryota;SAR;Alveolata;Protalveolata;Syndiniales;Syndiniales_Group_III;uncultured_marine_alveolate
## OTU_139 EF172945.1.1672_Eukaryota;SAR;Alveolata;Protalveolata;Syndiniales;Syndiniales_Group_II;uncultured_eukaryote
## OTU_4 KC488405.1.1558_Eukaryota;SAR;Alveolata;Dinoflagellata;Dinophyceae;Gymnodiniphycidae;Gymnodinium_clade;Erythropsidinium;uncultured_dinoflagellate
## SILVA_consensus MAS_classif MAS_consensus
## OTU_2 <NA> EU793773_MALV-I_Alveolata <NA>
## OTU_106 <NA> EU818437_MALV-I_Alveolata <NA>
## OTU_403 <NA> EU793615_MALV-III_Alveolata <NA>
## OTU_139 <NA> EU818480_MALV-II_Alveolata <NA>
## OTU_4 Dinophyceae EU780636_Dinophyceae_Alveolata Dinophyceae
## BM_classif BM_consensus
## OTU_2 3261_MALV-I_Alveolata_385 <NA>
## OTU_106 90_MALV-I_Alveolata_1447 <NA>
## OTU_403 38_MALV-III_Alveolata_6570 <NA>
## OTU_139 <NA> <NA>
## OTU_4 21474_Dinophyceae_Alveolata_6 Dinophyceae
## SILVA_plus_MAS_plus_BM_classif
## OTU_2 <NA>
## OTU_106 <NA>
## OTU_403 <NA>
## OTU_139 <NA>
## OTU_4 Dinophyceae
## [1] 46690 122
## st055_MD1013 st056_MD1035 st057_MD1042 st058_MD1080 st059_MD1100
## OTU_2 9289 2293 16285 5140 1187
## OTU_106 738 25 1497 3309 1808
## OTU_403 81 3 238 6 37
## OTU_139 106 23 211 58 82
## OTU_4 230 78 776 150 561
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 4050 59560 98630 126100 157000 935800
## st056_MD1035 st063_MD1176 st064_MD1186 st067_MD1246 st083_MD1517
## 21169 44047 48691 10358 11740
## st013_MD202 st137_MD2695 st029_MD506 st030_MD528 st004_MD60
## 41299 35895 8514 21381 36838
## st035_MD621 st037_MD646 st046_MD834 st051_MD916 st053_MD962
## 45669 17136 40589 38674 20969
## st054_MD985 st007_MD98
## 4050 45207
## [1] 46690 105
## [1] 46690 92
## [1] 43966 92
Table dimensions and content outline:
## [1] 46690 92
## st055_MD1013 st057_MD1042 st058_MD1080 st059_MD1100 st009_MD111
## OTU_2 9289 16285 5140 1187 8207
## OTU_106 738 1497 3309 1808 30
## OTU_403 81 238 6 37 0
## OTU_139 106 211 58 82 214
## OTU_4 230 776 150 561 337
Minimum number of reads per station:
min(colSums(tb18_tax_occur_min49975))
## [1] 49975
Maximum number of reads per station:
max(colSums(tb18_tax_occur_min49975))
## [1] 935755
Identification of stations with higher number of reads:
amplicons_per_sample<-colSums(tb18_tax_occur_min49975)
amplicons_per_sample[which(colSums(tb18_tax_occur_min49975)>300000)]
## st057_MD1042 st112_MD2051 st131_MD2516 st039_MD684
## 308992 387935 935755 328173
Overall reads per sample:
Let’s normalize the original dataset by randomly subsampling 49975 reads in each station:
tb18_tax_occur_min49975_t<-t(tb18_tax_occur_min49975)
tb18_tax_occur_ss49975<-rrarefy(tb18_tax_occur_min49975_t, 49975)
The normalized table shows the following dimensions and format:
## [1] 92 46690
## OTU_2 OTU_106 OTU_403 OTU_139 OTU_4
## st055_MD1013 8976 706 76 104 222
## st057_MD1042 2644 244 34 34 132
## st058_MD1080 1295 855 3 16 28
## st059_MD1100 464 689 14 35 205
## st009_MD111 2131 5 0 64 93
Its content fits with the expected normalization values (49975 reads per station):
rowSums(tb18_tax_occur_ss49975)
## st055_MD1013 st057_MD1042 st058_MD1080 st059_MD1100 st009_MD111
## 49975 49975 49975 49975 49975
## st060_MD1125 st062_MD1143 st065_MD1209 st066_MD1222 st068b_MD1260
## 49975 49975 49975 49975 49975
## st069_MD1293 st070_MD1302 st072_MD1331 st073_MD1354 st074_MD1368
## 49975 49975 49975 49975 49975
## st075_MD1398 st010_MD141 st076_MD1421 st077_MD1425 st078_MD1459
## 49975 49975 49975 49975 49975
## st079_MD1475 st081_MD1479 st082_MD1490 st085_MD1537 st086_MD1559
## 49975 49975 49975 49975 49975
## st087_MD1589 st088_MD1607 st089_MD1629 st092_MD1672 st093_MD1700
## 49975 49975 49975 49975 49975
## st094_MD1724 st095_MD1744 st096_MD1772 st097_MD1798 st098_MD1811
## 49975 49975 49975 49975 49975
## st101_MD1857 st102_MD1885 st103_MD1887 st104_MD1928 st106_MD1956
## 49975 49975 49975 49975 49975
## st107_MD1964 st012_MD196 st108_MD2004 st109_MD2008 st110_MD2047
## 49975 49975 49975 49975 49975
## st112_MD2051 st114_MD2094 st115_MD2111 st014_MD214 st118_MD2155
## 49975 49975 49975 49975 49975
## st120_MD2243 st126_MD2380 st127_MD2408 st128_MD2436 st129_MD2448
## 49975 49975 49975 49975 49975
## st131_MD2516 st015_MD254 st132_MD2562 st133_MD2594 st134_MD2624
## 49975 49975 49975 49975 49975
## st017_MD266 st138_MD2723 st140_MD2761 st018_MD285 st143_MD2864
## 49975 49975 49975 49975 49975
## st001_MD28 st145_MD2956 st019_MD311 st022_MD346 st023_MD366
## 49975 49975 49975 49975 49975
## st024_MD389 st002_MD40 st025_MD417 st026_MD439 st027_MD458
## 49975 49975 49975 49975 49975
## st028_MD486 st003_MD52 st032_MD550 st033_MD575 st034_MD591
## 49975 49975 49975 49975 49975
## st005_MD64 st038_MD664 st039_MD684 st040_MD712 st041_MD734
## 49975 49975 49975 49975 49975
## st043_MD753 st044_MD778 st045_MD806 st047_MD854 st049_MD876
## 49975 49975 49975 49975 49975
## st050_MD897 st052_MD940
## 49975 49975
Let’s check out how many OTUs don’t appear in the new table:
length(which(colSums(tb18_tax_occur_ss49975)==0)) #8045
## [1] 8061
There are 8045 OTUs that don’t show any occurrence in the normalized data. Let’s remove them from the table and take a look at its final dimensions:
tb18_tax_occur_ss49975_no_cero<-tb18_tax_occur_ss49975[,-(which(colSums(tb18_tax_occur_ss49975)==0))]
tb18_tax_occur_ss49975_no_cero<-tb18_tax_occur_ss49975_no_cero[mixedorder(row.names(tb18_tax_occur_ss49975_no_cero)),]
dim(tb18_tax_occur_ss49975_no_cero) #92 38645
## [1] 92 38629
Datasets summary:
dim(tb18_tax) #46690 129
## [1] 46690 129
dim(tb18_tax_occur) #46690 122
## [1] 46690 122
dim(tb18_tax_occur_ss49975_no_cero) #92 38645
## [1] 92 38629
Most of the samples take Shannon Index values around 6:
Lowest number of OTUs per sample:
## [1] 1256
Maximum number of OTUs per sample:
## [1] 5182
In most of the samples, we can identify about 4000 OTUs:
plot(sort(OTUs_per_sample_18S_tax_occur_ss49975), pch=19)
boxplot(OTUs_per_sample_18S_tax_occur_ss49975, pch=19)
pielou_evenness_18S_tax_occur_ss49975<-tb18_tax_occur_ss49975_div/log(OTUs_per_sample_18S_tax_occur_ss49975)
The Pielou index (constrained between 0 and 1) takes values closer to 1 as the variation of species proportion in a sample increases. Most of the samples get values between 0.7 and 0.8, meaning that the numerical composition of different OTUs in a sample is highly variable - there’s no constant presence of dominant species.
The less variation in communities between the species (and the presence of a dominant specie), the lower J’ is.
plot(sort(pielou_evenness_18S_tax_occur_ss49975), pch=19)
boxplot(pielou_evenness_18S_tax_occur_ss49975, pch=19)
The OTU_6, with 196592 reads, is the most abundant in the overall dataset:
head(sort(colSums(tb18_tax_occur_ss49975_no_cero), decreasing=T), n=10L)
## OTU_6 OTU_2 OTU_23 OTU_8 OTU_25 OTU_27 OTU_1 OTU_12 OTU_7 OTU_9
## 169592 161123 56523 51607 48699 45926 40334 39512 38398 35406
Most of the OTUs show very few occurrences; suggesting that we will probably be able to identify a significant ammount of rare otus:
plot(log(sort(colSums(tb18_tax_occur_ss49975_no_cero), decreasing=T)), pch=19)
The OTUs abundance distribution fits relativelly close to log-normal model.
According to Preston’s lognormal model fit into species frequencies groups, we’re missing ~2451 species:
tb18_tax_occur_ss49975_prestonfit<-prestonfit(colSums(tb18_tax_occur_min49975_t))
plot(tb18_tax_occur_ss49975_prestonfit, main="Pooled species")
veiledspec(tb18_tax_occur_ss49975_prestonfit)
## Extrapolated Observed Veiled
## 46565.639 43966.000 2599.639
When computing Prestons’ lognormal model fit without pooling data into groups, we seem to miss ~2319 species:
## Extrapolated Observed Veiled
## 46422.875 43966.000 2456.875
rarec_input<-t(as.matrix(colSums(tb18_tax_occur_ss49975_no_cero)))
tb18_rarecurve_step1000_40000<-rarecurve(rarec_input, step = 1000, 40000, xlab = "Sample size", ylab = "OTUs", label = TRUE, main="18S amplicons diversity step=1000 & ss=40000\n(40,816 OTUs; 5,247,375 reads)\n")
rarec_allOTUs_input<-t(as.matrix(colSums(t(tb18_tax_occur))))
tb18_rarecurve_allOTUs_step1000_46690<-rarecurve(rarec_allOTUs_input, step = 1000, 46690, xlab = "Sample size", ylab = "OTUs", label = TRUE, main="18S amplicons diversity non-rarefied step=1000 & ss=100000\n(46,690 OTUs; 15,386,452 reads)\n")
The Bray-Curtis dissimilarity, constrained between 0 (minimum distance) and 1 (highest dissimilarity) allows us to quantify the differences between samples according to the composition and relative abundance of their OTUs. In our dataset, most of the samples pairs take dissimilarity values between between 6.5 and 7.5, meaning that their composition is substantially different.
The stations seem to form clusters according to geographic localization, but there are no evident clusters separated from the general groups.
(To be done: assign Longhurst provinces information to each station and check if any of the central clusters is meaningful regarding to the samples’ geographical location)
We can identify a prominent group in the central part of the NMDS plot and a few outliers in the central-high edge of the plot. The stress parameter takes a value below 0.2, suggesting that the plot is acceptable.
##
## Call:
## monoMDS(dist = tb18_tax_occur_ss49975_no_cero.bray)
##
## Non-metric Multidimensional Scaling
##
## 92 points, dissimilarity 'bray', call 'vegdist(x = tb18_tax_occur_ss49975_no_cero, method = "bray")'
##
## Dimensions: 2
## Stress: 0.2113352
## Stress type 1, weak ties
## Scores scaled to unit root mean square, rotated to principal components
## Stopped after 154 iterations: Stress nearly unchanged (ratio > sratmax)
When implementing a most robut function for computing NMDS plots, the result is quiet the same:
## Run 0 stress 0.2048976
## Run 1 stress 0.2130134
## Run 2 stress 0.2043258
## ... New best solution
## ... Procrustes: rmse 0.0169961 max resid 0.1571898
## Run 3 stress 0.2049018
## Run 4 stress 0.4113734
## Run 5 stress 0.2043288
## ... Procrustes: rmse 0.0004151054 max resid 0.00329998
## ... Similar to previous best
## Run 6 stress 0.2149611
## Run 7 stress 0.2049333
## Run 8 stress 0.2063113
## Run 9 stress 0.2049063
## Run 10 stress 0.2049423
## Run 11 stress 0.2055204
## Run 12 stress 0.2048942
## Run 13 stress 0.2063229
## Run 14 stress 0.2194009
## Run 15 stress 0.2063571
## Run 16 stress 0.2048972
## Run 17 stress 0.2055217
## Run 18 stress 0.2048899
## Run 19 stress 0.2134129
## Run 20 stress 0.2055332
## *** Solution reached
## Warning in ordiplot(x, choices = choices, type = type, display = display, :
## Species scores not available
## Warning in if (class(lats) == "SpatialPoints") lats <- coordinates(lats):
## the condition has length > 1 and only the first element will be used
Working datasets:
dim(tb18_tax_occur_ss49975_no_cero)
## [1] 92 38629
tb18_tax_occur_ss49975_no_cero[1:5, 1:5]
## OTU_2 OTU_106 OTU_403 OTU_139 OTU_4
## st001_MD28 131 44 0 5 53
## st002_MD40 268 8 9 5 44
## st003_MD52 736 23 41 15 34
## st005_MD64 854 103 10 0 126
## st009_MD111 2131 5 0 64 93
dim(tb18_tax_occur_ss49975_no_cero.bray)
## [1] 92 92
dim(geo_distances_MP_18S)
## [1] 92 92
Communities quickly change their composition across geographical distances:
plot(geo_distances_MP_18S, tb18_tax_occur_ss49975_no_cero.bray, pch=19, cex=0.4, xlab="Geopgraphical distances", ylab="Bray-Curtis dissimilarities")
Mantel statistic is -significantlly- so low, meaning that the correlation between samples dissimilarity and geographical distances is weak.
mantel(geo_distances_MP_18S, tb18_tax_occur_ss49975_no_cero.bray)
##
## Mantel statistic based on Pearson's product-moment correlation
##
## Call:
## mantel(xdis = geo_distances_MP_18S, ydis = tb18_tax_occur_ss49975_no_cero.bray)
##
## Mantel statistic r: 0.1731
## Significance: 0.001
##
## Upper quantiles of permutations (null model):
## 90% 95% 97.5% 99%
## 0.0218 0.0293 0.0358 0.0425
## Permutation: free
## Number of permutations: 999
Maximum distance between samples:
## [1] 19500.19
Minimum distance between samples:
## [1] 0
Correlograms:
MP_18s_ss49975_mantel_correl_by_1000km<-mantel.correlog(tb18_tax_occur_ss49975_no_cero.bray, D.geo=geo_distances_MP_18S, break.pts=seq(0,20000, by=1000))
plot(MP_18s_ss49975_mantel_correl_by_1000km)
MP_18s_ss49975_mantel_correl_by_100km<-mantel.correlog(tb18_tax_occur_ss49975_no_cero.bray, D.geo=geo_distances_MP_18S, break.pts=seq(0,20000, by=100))
plot(MP_18s_ss49975_mantel_correl_by_100km)
In the following plot, we can appreciate the OTUs distribution according to their percentage of occurence and relative abundance. The red line keeps up OTUs that occur in more than 80% of the samples, the green line limits regionally rare OTUs (< 0.001%), and the blue one restricts regionally abundant OTUs (> 0.1%).
Regionally abundant OTUs (relative abundance over 0.1%):
tb18_ss49975_abundant_sorted_prov<-cbind(otu_names=row.names(tb18_ss49975_abundant_sorted),tb18_ss49975_abundant_sorted)
## otu_names mean_rabund perc_occur SILVA_plus_MAS_plus_BM_classif
## 1 OTU_1 0.008772647 100.00000 Dinophyceae
## 2 OTU_10 0.004647541 91.30435 <NA>
## 3 OTU_101 0.002379451 86.95652 <NA>
## 4 OTU_102 0.001934880 80.43478 <NA>
## 5 OTU_10246 0.001144268 76.08696 <NA>
## 6 OTU_103 0.001510103 90.21739 <NA>
## 7 OTU_104 0.001518803 80.43478 <NA>
## 8 OTU_105 0.001420493 46.73913 <NA>
## 9 OTU_106 0.003904561 88.04348 <NA>
## 10 OTU_108 0.001519890 71.73913 <NA>
## 11 OTU_109 0.001773060 64.13043 <NA>
## 12 OTU_11 0.007058094 100.00000 <NA>
## 13 OTU_110 0.001633208 84.78261 Dinophyceae
## 14 OTU_111 0.001425278 94.56522 Dinophyceae
## 15 OTU_112 0.001392653 94.56522 Dinophyceae
## 16 OTU_113 0.001118820 76.08696 <NA>
## 17 OTU_1136 0.001289775 98.91304 Dinophyceae
## 18 OTU_11454 0.001849838 100.00000 <NA>
## 19 OTU_115 0.001266068 47.82609 <NA>
## 20 OTU_116 0.001787633 83.69565 <NA>
## 21 OTU_117 0.001023338 91.30435 <NA>
## 22 OTU_118 0.001006373 70.65217 <NA>
## 23 OTU_11833 0.001151010 84.78261 <NA>
## 24 OTU_119 0.002767906 28.26087 <NA>
## 25 OTU_12 0.008593862 100.00000 <NA>
## 26 OTU_120 0.001078365 68.47826 <NA>
## 27 OTU_121 0.001687148 98.91304 Dinophyceae
## 28 OTU_122 0.001221698 66.30435 <NA>
## 29 OTU_123 0.001025513 73.91304 <NA>
## 30 OTU_124 0.001649738 82.60870 <NA>
## 31 OTU_125 0.001232355 96.73913 <NA>
## 32 OTU_126 0.001682580 96.73913 <NA>
## 33 OTU_128 0.001146443 84.78261 <NA>
## 34 OTU_129 0.001082498 66.30435 Chrysophyceae
## 35 OTU_13 0.007238402 97.82609 <NA>
## 36 OTU_130 0.001074668 100.00000 <NA>
## 37 OTU_131 0.001257368 96.73913 Dictyochophyceae
## 38 OTU_132 0.001183853 61.95652 <NA>
## 39 OTU_133 0.001000935 57.60870 <NA>
## 40 OTU_135 0.001419623 89.13043 Chrysophyceae
## 41 OTU_136 0.001146660 52.17391 <NA>
## 42 OTU_137 0.002185441 61.95652 <NA>
## 43 OTU_14 0.003708158 92.39130 <NA>
## 44 OTU_140 0.001117950 79.34783 <NA>
## 45 OTU_141 0.001080540 66.30435 <NA>
## 46 OTU_142 0.001206255 72.82609 Prasinophyceae_clade-IX
## 47 OTU_144 0.001044218 92.39130 <NA>
## 48 OTU_145 0.001232355 86.95652 <NA>
## 49 OTU_146 0.001199078 66.30435 <NA>
## 50 OTU_14696 0.006883659 96.73913 <NA>
## 51 OTU_15 0.003846488 55.43478 <NA>
## 52 OTU_151 0.001046393 65.21739 Chrysophyceae
## 53 OTU_157 0.001019205 44.56522 <NA>
## 54 OTU_16 0.002011223 36.95652 Prasinophyceae_clade-VII
## 55 OTU_161 0.001170803 54.34783 <NA>
## 56 OTU_162 0.001259543 76.08696 <NA>
## 57 OTU_163 0.001031168 78.26087 <NA>
## 58 OTU_165 0.001167975 80.43478 <NA>
## 59 OTU_17 0.004036366 94.56522 <NA>
## 60 OTU_174 0.001095765 73.91304 <NA>
## 61 OTU_177 0.001219740 92.39130 <NA>
## 62 OTU_179 0.001121865 71.73913 <NA>
## 63 OTU_18 0.006814059 100.00000 Dinophyceae
## 64 OTU_180 0.001238880 69.56522 <NA>
## 65 OTU_1819 0.001168410 69.56522 <NA>
## 66 OTU_1842 0.001046610 95.65217 Dinophyceae
## 67 OTU_188 0.001339365 95.65217 Dinophyceae
## 68 OTU_1882 0.001395480 69.56522 <NA>
## 69 OTU_19 0.004412859 100.00000 <NA>
## 70 OTU_192 0.001192118 95.65217 <NA>
## 71 OTU_2 0.035044261 100.00000 <NA>
## 72 OTU_20 0.007376297 54.34783 Pelagophyceae
## 73 OTU_21 0.005322009 94.56522 <NA>
## 74 OTU_22 0.004715619 91.30435 <NA>
## 75 OTU_220 0.001024643 95.65217 <NA>
## 76 OTU_23 0.012293756 96.73913 <NA>
## 77 OTU_235 0.001173630 84.78261 <NA>
## 78 OTU_24 0.006683777 98.91304 <NA>
## 79 OTU_243 0.001317833 97.82609 <NA>
## 80 OTU_25 0.010592035 100.00000 Dinophyceae
## 81 OTU_26 0.005064271 96.73913 <NA>
## 82 OTU_27 0.009988907 100.00000 <NA>
## 83 OTU_28 0.004075081 95.65217 <NA>
## 84 OTU_29 0.002512126 98.91304 <NA>
## 85 OTU_3 0.005866629 85.86957 <NA>
## 86 OTU_30 0.003419971 72.82609 <NA>
## 87 OTU_31 0.003338843 95.65217 <NA>
## 88 OTU_32 0.002127803 77.17391 <NA>
## 89 OTU_33 0.003103291 98.91304 Dinophyceae
## 90 OTU_338 0.001162973 95.65217 Dinophyceae
## 91 OTU_34 0.005778106 71.73913 Chrysophyceae
## 92 OTU_35 0.002468843 86.95652 <NA>
## 93 OTU_35494 0.001208213 68.47826 <NA>
## 94 OTU_35799 0.002926463 100.00000 Dinophyceae
## 95 OTU_36 0.003820388 89.13043 <NA>
## 96 OTU_37 0.002746808 95.65217 <NA>
## 97 OTU_38 0.003358636 92.39130 <NA>
## 98 OTU_39 0.003165496 98.91304 Dinophyceae
## 99 OTU_3988 0.001437240 80.43478 <NA>
## 100 OTU_4 0.007469604 100.00000 Dinophyceae
## 101 OTU_40 0.004207104 100.00000 Dinophyceae
## 102 OTU_41 0.002774866 93.47826 <NA>
## 103 OTU_42 0.002942776 95.65217 <NA>
## 104 OTU_43 0.002659156 68.47826 <NA>
## 105 OTU_44 0.004336299 48.91304 Prasinophyceae
## 106 OTU_45 0.004710616 100.00000 Dinophyceae
## 107 OTU_46 0.002661113 28.26087 Prasinophyceae
## 108 OTU_47 0.003102203 73.91304 <NA>
## 109 OTU_48 0.002643278 93.47826 <NA>
## 110 OTU_49 0.002859908 98.91304 Dinophyceae
## 111 OTU_5 0.004774996 79.34783 <NA>
## 112 OTU_50 0.003929791 96.73913 <NA>
## 113 OTU_51 0.003430193 88.04348 <NA>
## 114 OTU_52 0.002800748 85.86957 <NA>
## 115 OTU_53 0.002048633 39.13043 <NA>
## 116 OTU_54 0.001941405 94.56522 <NA>
## 117 OTU_55 0.003479348 100.00000 <NA>
## 118 OTU_56 0.002291798 66.30435 <NA>
## 119 OTU_5618 0.003471301 96.73913 <NA>
## 120 OTU_57 0.001676273 94.56522 <NA>
## 121 OTU_58 0.003224873 73.91304 <NA>
## 122 OTU_59 0.001705635 77.17391 <NA>
## 123 OTU_6 0.036886269 100.00000 <NA>
## 124 OTU_61 0.001685625 81.52174 <NA>
## 125 OTU_62 0.002199796 83.69565 <NA>
## 126 OTU_63 0.001269983 56.52174 Prasinophyceae_clade-VII
## 127 OTU_6315 0.002216761 92.39130 <NA>
## 128 OTU_64 0.003872153 100.00000 <NA>
## 129 OTU_65 0.002889706 97.82609 <NA>
## 130 OTU_66 0.002551928 97.82609 Dinophyceae
## 131 OTU_67 0.001699328 98.91304 Dinophyceae
## 132 OTU_68 0.002422733 96.73913 <NA>
## 133 OTU_69 0.002504948 56.52174 <NA>
## 134 OTU_7 0.008351567 100.00000 <NA>
## 135 OTU_70 0.003651608 77.17391 <NA>
## 136 OTU_71 0.002629576 100.00000 <NA>
## 137 OTU_72 0.003060443 97.82609 <NA>
## 138 OTU_73 0.002135416 73.91304 <NA>
## 139 OTU_74 0.002421646 96.73913 <NA>
## 140 OTU_75 0.002597386 97.82609 <NA>
## 141 OTU_76 0.001797203 89.13043 <NA>
## 142 OTU_767 0.001081845 32.60870 <NA>
## 143 OTU_77 0.001420058 66.30435 Dinophyceae
## 144 OTU_78 0.002143246 98.91304 Dinophyceae
## 145 OTU_7889 0.002225026 81.52174 <NA>
## 146 OTU_8 0.011224525 97.82609 <NA>
## 147 OTU_80 0.002247428 70.65217 <NA>
## 148 OTU_81 0.003464558 98.91304 <NA>
## 149 OTU_82 0.002185223 91.30435 <NA>
## 150 OTU_83 0.001569480 71.73913 <NA>
## 151 OTU_84 0.002935163 73.91304 <NA>
## 152 OTU_85 0.002136721 92.39130 <NA>
## 153 OTU_87 0.001936185 92.39130 Dictyochophyceae
## 154 OTU_88 0.001396568 92.39130 <NA>
## 155 OTU_89 0.002109316 78.26087 <NA>
## 156 OTU_8924 0.001071188 72.82609 <NA>
## 157 OTU_9 0.007700807 76.08696 <NA>
## 158 OTU_90 0.001681710 97.82609 <NA>
## 159 OTU_91 0.001151228 69.56522 <NA>
## 160 OTU_92 0.001981425 73.91304 <NA>
## 161 OTU_93 0.001196250 90.21739 <NA>
## 162 OTU_94 0.001318268 94.56522 <NA>
## 163 OTU_95 0.001564695 85.86957 <NA>
## 164 OTU_96 0.002075603 66.30435 <NA>
## 165 OTU_98 0.001601888 85.86957 <NA>
## 166 OTU_99 0.001284338 70.65217 <NA>
## [1] 166 4
Proportion of regionally abundant OTUs (%):
## [1] 0.429729
Cosmopolitan OTUs (relative abundance over 0.1% and occurence in more than 80% of samples):
otu_tb18_ss49975_cosmop_sorted_prov<-merge(otu_tb18_ss49975_cosmop_sorted_prov,tb18_class_prov, by="otu_names", all.x=TRUE)
## otu_names mean_rabund perc_occur SILVA_plus_MAS_plus_BM_classif
## 1 OTU_1 0.008772647 100.00000 Dinophyceae
## 2 OTU_10 0.004647541 91.30435 <NA>
## 3 OTU_101 0.002379451 86.95652 <NA>
## 4 OTU_102 0.001934880 80.43478 <NA>
## 5 OTU_103 0.001510103 90.21739 <NA>
## 6 OTU_104 0.001518803 80.43478 <NA>
## 7 OTU_106 0.003904561 88.04348 <NA>
## 8 OTU_11 0.007058094 100.00000 <NA>
## 9 OTU_110 0.001633208 84.78261 Dinophyceae
## 10 OTU_111 0.001425278 94.56522 Dinophyceae
## 11 OTU_112 0.001392653 94.56522 Dinophyceae
## 12 OTU_1136 0.001289775 98.91304 Dinophyceae
## 13 OTU_11454 0.001849838 100.00000 <NA>
## 14 OTU_116 0.001787633 83.69565 <NA>
## 15 OTU_117 0.001023338 91.30435 <NA>
## 16 OTU_11833 0.001151010 84.78261 <NA>
## 17 OTU_12 0.008593862 100.00000 <NA>
## 18 OTU_121 0.001687148 98.91304 Dinophyceae
## 19 OTU_124 0.001649738 82.60870 <NA>
## 20 OTU_125 0.001232355 96.73913 <NA>
## 21 OTU_126 0.001682580 96.73913 <NA>
## 22 OTU_128 0.001146443 84.78261 <NA>
## 23 OTU_13 0.007238402 97.82609 <NA>
## 24 OTU_130 0.001074668 100.00000 <NA>
## 25 OTU_131 0.001257368 96.73913 Dictyochophyceae
## 26 OTU_135 0.001419623 89.13043 Chrysophyceae
## 27 OTU_14 0.003708158 92.39130 <NA>
## 28 OTU_144 0.001044218 92.39130 <NA>
## 29 OTU_145 0.001232355 86.95652 <NA>
## 30 OTU_14696 0.006883659 96.73913 <NA>
## 31 OTU_165 0.001167975 80.43478 <NA>
## 32 OTU_17 0.004036366 94.56522 <NA>
## 33 OTU_177 0.001219740 92.39130 <NA>
## 34 OTU_18 0.006814059 100.00000 Dinophyceae
## 35 OTU_1842 0.001046610 95.65217 Dinophyceae
## 36 OTU_188 0.001339365 95.65217 Dinophyceae
## 37 OTU_19 0.004412859 100.00000 <NA>
## 38 OTU_192 0.001192118 95.65217 <NA>
## 39 OTU_2 0.035044261 100.00000 <NA>
## 40 OTU_21 0.005322009 94.56522 <NA>
## 41 OTU_22 0.004715619 91.30435 <NA>
## 42 OTU_220 0.001024643 95.65217 <NA>
## 43 OTU_23 0.012293756 96.73913 <NA>
## 44 OTU_235 0.001173630 84.78261 <NA>
## 45 OTU_24 0.006683777 98.91304 <NA>
## 46 OTU_243 0.001317833 97.82609 <NA>
## 47 OTU_25 0.010592035 100.00000 Dinophyceae
## 48 OTU_26 0.005064271 96.73913 <NA>
## 49 OTU_27 0.009988907 100.00000 <NA>
## 50 OTU_28 0.004075081 95.65217 <NA>
## 51 OTU_29 0.002512126 98.91304 <NA>
## 52 OTU_3 0.005866629 85.86957 <NA>
## 53 OTU_31 0.003338843 95.65217 <NA>
## 54 OTU_33 0.003103291 98.91304 Dinophyceae
## 55 OTU_338 0.001162973 95.65217 Dinophyceae
## 56 OTU_35 0.002468843 86.95652 <NA>
## 57 OTU_35799 0.002926463 100.00000 Dinophyceae
## 58 OTU_36 0.003820388 89.13043 <NA>
## 59 OTU_37 0.002746808 95.65217 <NA>
## 60 OTU_38 0.003358636 92.39130 <NA>
## 61 OTU_39 0.003165496 98.91304 Dinophyceae
## 62 OTU_3988 0.001437240 80.43478 <NA>
## 63 OTU_4 0.007469604 100.00000 Dinophyceae
## 64 OTU_40 0.004207104 100.00000 Dinophyceae
## 65 OTU_41 0.002774866 93.47826 <NA>
## 66 OTU_42 0.002942776 95.65217 <NA>
## 67 OTU_45 0.004710616 100.00000 Dinophyceae
## 68 OTU_48 0.002643278 93.47826 <NA>
## 69 OTU_49 0.002859908 98.91304 Dinophyceae
## 70 OTU_50 0.003929791 96.73913 <NA>
## 71 OTU_51 0.003430193 88.04348 <NA>
## 72 OTU_52 0.002800748 85.86957 <NA>
## 73 OTU_54 0.001941405 94.56522 <NA>
## 74 OTU_55 0.003479348 100.00000 <NA>
## 75 OTU_5618 0.003471301 96.73913 <NA>
## 76 OTU_57 0.001676273 94.56522 <NA>
## 77 OTU_6 0.036886269 100.00000 <NA>
## 78 OTU_61 0.001685625 81.52174 <NA>
## 79 OTU_62 0.002199796 83.69565 <NA>
## 80 OTU_6315 0.002216761 92.39130 <NA>
## 81 OTU_64 0.003872153 100.00000 <NA>
## 82 OTU_65 0.002889706 97.82609 <NA>
## 83 OTU_66 0.002551928 97.82609 Dinophyceae
## 84 OTU_67 0.001699328 98.91304 Dinophyceae
## 85 OTU_68 0.002422733 96.73913 <NA>
## 86 OTU_7 0.008351567 100.00000 <NA>
## 87 OTU_71 0.002629576 100.00000 <NA>
## 88 OTU_72 0.003060443 97.82609 <NA>
## 89 OTU_74 0.002421646 96.73913 <NA>
## 90 OTU_75 0.002597386 97.82609 <NA>
## 91 OTU_76 0.001797203 89.13043 <NA>
## 92 OTU_78 0.002143246 98.91304 Dinophyceae
## 93 OTU_7889 0.002225026 81.52174 <NA>
## 94 OTU_8 0.011224525 97.82609 <NA>
## 95 OTU_81 0.003464558 98.91304 <NA>
## 96 OTU_82 0.002185223 91.30435 <NA>
## 97 OTU_85 0.002136721 92.39130 <NA>
## 98 OTU_87 0.001936185 92.39130 Dictyochophyceae
## 99 OTU_88 0.001396568 92.39130 <NA>
## 100 OTU_90 0.001681710 97.82609 <NA>
## 101 OTU_93 0.001196250 90.21739 <NA>
## 102 OTU_94 0.001318268 94.56522 <NA>
## 103 OTU_95 0.001564695 85.86957 <NA>
## 104 OTU_98 0.001601888 85.86957 <NA>
## [1] 104 4
Number and proportion (%) of cosmopolitan OTUs:
## [1] 104
## [1] 0.2692278
Number and proportion (%) of rare OTUs:
nrow(otu_tb18_ss49975_rabund_percoccur[otu_tb18_ss49975_rabund_percoccur$mean_rabund < 0.00001 & otu_tb18_ss49975_rabund_percoccur$mean_rabund >0,])
## [1] 32851
## [1] 85.04233
Let’s add the taxonomic classification by merging “tb18_tax_occur_ss49975_no_cero” with “tb18_tax”:
## [1] 38629 100
## Row.names st001_MD28 st002_MD40 st003_MD52 st005_MD64
## 1 OTU_1 131 132 234 149
## 2 OTU_10 4 51 167 77
## 3 OTU_100 90 207 24 47
## 4 OTU_1000 1 0 8 0
## 5 OTU_10000 2 4 5 0
## [1] 38629 99
## st001_MD28 st002_MD40 st003_MD52 st005_MD64 st009_MD111
## OTU_1 131 132 234 149 240
## OTU_10 4 51 167 77 94
## OTU_100 90 207 24 47 340
## OTU_1000 1 0 8 0 0
## OTU_10000 2 4 5 0 0
## [1] 38629 100
## st001_MD28 st002_MD40 st003_MD52 st005_MD64 st009_MD111
## OTU_1 131 132 234 149 240
## OTU_2 131 268 736 854 2131
## OTU_3 334 1499 1574 433 286
## OTU_4 53 44 34 126 93
## OTU_5 0 109 276 41 98
## [1] 12040 100
#create a table per group and count in how many samples they occur.
Dinophyceae_tb <- tb18_phototrophs[which(tb18_phototrophs$classif == "Dinophyceae"),]
Dinophyceae_tb[1:5,1:5]
## st001_MD28 st002_MD40 st003_MD52 st005_MD64 st009_MD111
## NA NA NA NA NA NA
## NA.1 NA NA NA NA NA
## NA.2 NA NA NA NA NA
## NA.3 NA NA NA NA NA
## NA.4 NA NA NA NA NA
Dinophyceae_tb_occur <- Dinophyceae_tb[,1:92]
Dinophyceae_tb_occur[1:5,1:5]
## st001_MD28 st002_MD40 st003_MD52 st005_MD64 st009_MD111
## NA NA NA NA NA NA
## NA.1 NA NA NA NA NA
## NA.2 NA NA NA NA NA
## NA.3 NA NA NA NA NA
## NA.4 NA NA NA NA NA
dim(Dinophyceae_tb_occur)
## [1] 0 92
length(Dinophyceae_tb_occur[,colSums(Dinophyceae_tb_occur) > 0])
## [1] 0
#Dinophyceae_tb_samples <- Dinophyceae_tb_occur[,colSums(Dinophyceae_tb_occur) > 0]
#length(Dinophyceae_tb_samples[which(colSums(Dinophyceae_tb_occur) != 0)])
Prasinophyceae_tb <- tb18_phototrophs[which(tb18_phototrophs$classif == "other_Prasinophyceae"),]
Prasinophyceae_tb_occur <- Prasinophyceae_tb[,1:92]
length(Prasinophyceae_tb_occur[,colSums(Prasinophyceae_tb_occur) > 0])
## [1] 0
Chrysophyceae_tb <- tb18_phototrophs[which(tb18_phototrophs$classif == "Chrysophyceae"),]
Chrysophyceae_tb_occur <- Chrysophyceae_tb[,1:92]
length(Chrysophyceae_tb_occur[,colSums(Chrysophyceae_tb_occur) > 0])
## [1] 0
Pelagophyceae_tb <- tb18_phototrophs[which(tb18_phototrophs$classif == "Pelagophyceae"),]
Pelagophyceae_tb_occur <- Pelagophyceae_tb[,1:92]
length(Pelagophyceae_tb_occur[,colSums(Pelagophyceae_tb_occur) > 0])
## [1] 0
Dictyochophyceae_tb <- tb18_phototrophs[which(tb18_phototrophs$classif == "Dictyochophyceae"),]
Dictyochophyceae_tb_occur <- Dictyochophyceae_tb[,1:92]
length(Dictyochophyceae_tb_occur[,colSums(Dictyochophyceae_tb_occur) > 0])
## [1] 0
Cryptomonadales_tb <- tb18_phototrophs[which(tb18_phototrophs$classif == "Cryptophyceae"),]
Cryptomonadales_tb_occur <- Cryptomonadales_tb[,1:92]
length(Cryptomonadales_tb_occur[,colSums(Cryptomonadales_tb_occur) > 0])
## [1] 0
Bacillariophyta_tb <- tb18_phototrophs[which(tb18_phototrophs$classif == "Bacillariophyceae"),]
Bacillariophyta_tb_occur <- Bacillariophyta_tb[,1:92]
length(Bacillariophyta_tb_occur[,colSums(Bacillariophyta_tb_occur) > 0])
## [1] 0
Chlorarachniophyta_tb <- tb18_phototrophs[which(tb18_phototrophs$classif == "Chlorarachniophyceae"),]
Chlorarachniophyta_tb_occur <- Chlorarachniophyta_tb[,1:92]
length(Chlorarachniophyta_tb_occur[,colSums(Chlorarachniophyta_tb_occur) > 0])
## [1] 0
Bolidophyceae_tb <- tb18_phototrophs[which(tb18_phototrophs$classif == "Bolidophyceae"),]
Bolidophyceae_tb_occur <- Bolidophyceae_tb[,1:92]
length(Bolidophyceae_tb_occur[,colSums(Bolidophyceae_tb_occur) > 0])
## [1] 0
Pinguiochysidales_tb <- tb18_phototrophs[which(tb18_phototrophs$classif == "Pinguiophyceae"),]
Pinguiochysidales_tb_occur <- Pinguiochysidales_tb[,1:92]
length(Pinguiochysidales_tb_occur[,colSums(Pinguiochysidales_tb_occur) > 0])
## [1] 0
Prymnesiophyceae_tb <- tb18_phototrophs[which(tb18_phototrophs$classif == "Prymnesiophyceae"),]
Prymnesiophyceae_tb_occur <- Prymnesiophyceae_tb[,1:92]
length(Prymnesiophyceae_tb_occur[,colSums(Prymnesiophyceae_tb_occur) > 0])
## [1] 0
Mamiellophyceae_tb <- tb18_phototrophs[which(tb18_phototrophs$classif == "Mamiellophyceae"),]
Mamiellophyceae_tb_occur <- Mamiellophyceae_tb[,1:92]
length(Mamiellophyceae_tb_occur[,colSums(Mamiellophyceae_tb_occur) > 0])
## [1] 0
Eustigmatales_tb <- tb18_phototrophs[which(tb18_phototrophs$classif == "Eustigmatophyceae"),]
Eustigmatales_tb_occur <- Eustigmatales_tb[,1:92]
length(Eustigmatales_tb_occur[,colSums(Eustigmatales_tb_occur) > 0])
## [1] 0
Chlorophyceae_tb <- tb18_phototrophs[which(tb18_phototrophs$classif == "Chlorophyceae"),]
Chlorophyceae_tb_occur <- Chlorophyceae_tb[,1:92]
length(Chlorophyceae_tb_occur[,colSums(Chlorophyceae_tb_occur) > 0])
## [1] 0
Ulvophyceae_tb <- tb18_phototrophs[which(tb18_phototrophs$classif == "Ulvophyceae"),]
Ulvophyceae_tb_occur <- Ulvophyceae_tb[,1:92]
length(Ulvophyceae_tb_occur[,colSums(Ulvophyceae_tb_occur) > 0])
## [1] 0
Raphydophyceae_tb <- tb18_phototrophs[which(tb18_phototrophs$classif == "Raphydophyceae"),]
Raphydophyceae_tb_occur <- Raphydophyceae_tb[,1:92]
length(Raphydophyceae_tb_occur[,colSums(Raphydophyceae_tb_occur) > 0])
## [1] 0
Trebouxiophyceae_tb <- tb18_phototrophs[which(tb18_phototrophs$classif == "Trebouxiophyceae"),]
Trebouxiophyceae_tb_occur <- Trebouxiophyceae_tb[,1:92]
length(Trebouxiophyceae_tb_occur[,colSums(Trebouxiophyceae_tb_occur) > 0])
## [1] 0
Phaeophyceae_tb <- tb18_phototrophs[which(tb18_phototrophs$classif == "Phaeophyceae"),]
Phaeophyceae_tb_occur <- Phaeophyceae_tb[,1:92]
length(Phaeophyceae_tb_occur[,colSums(Phaeophyceae_tb_occur) > 0])
## [1] 0
Phaeothamniophyceae_tb <- tb18_phototrophs[which(tb18_phototrophs$classif == "Phaeothamniophyceae"),]
Phaeothamniophyceae_tb_occur <- Phaeothamniophyceae_tb[,1:92]
length(Phaeothamniophyceae_tb_occur[,colSums(Phaeothamniophyceae_tb_occur) > 0])
## [1] 0
Xanthophyceae_tb <- tb18_phototrophs[which(tb18_phototrophs$classif == "Xanthophyceae"),]
Xanthophyceae_tb_occur <- Xanthophyceae_tb[,1:92]
length(Xanthophyceae_tb_occur[,colSums(Xanthophyceae_tb_occur) > 0])
## [1] 0
Chlorodendrophyceae_tb <- tb18_phototrophs[which(tb18_phototrophs$classif == "Chlorodendrophyceae"),]
Chlorodendrophyceae_tb_occur <- Chlorodendrophyceae_tb[,1:92]
length(Chlorodendrophyceae_tb_occur[,colSums(Chlorodendrophyceae_tb_occur) > 0])
## [1] 0
IncertaeSedis_Archaeplastida_tb <- tb18_phototrophs[which(tb18_phototrophs$classif == "IncertaeSedis_Archaeplastida"),]
IncertaeSedis_Archaeplastida_tb_occur <- IncertaeSedis_Archaeplastida_tb[,1:92]
length(IncertaeSedis_Archaeplastida_tb_occur[,colSums(IncertaeSedis_Archaeplastida_tb_occur) > 0])
## [1] 0
Nephroselmidophyceae_tb <- tb18_phototrophs[which(tb18_phototrophs$classif == "Nephroselmidophyceae"),]
Nephroselmidophyceae_tb_occur <- Nephroselmidophyceae_tb[,1:92]
length(Nephroselmidophyceae_tb_occur[,colSums(Nephroselmidophyceae_tb_occur) > 0])
## [1] 0
Pavlovophyceae_tb <- tb18_phototrophs[which(tb18_phototrophs$classif == "Pavlovophyceae"),]
Pavlovophyceae_tb_occur <- Pavlovophyceae_tb[,1:92]
length(Pavlovophyceae_tb_occur[,colSums(Pavlovophyceae_tb_occur) > 0])
## [1] 0
Rhodophyceae_tb <- tb18_phototrophs[which(tb18_phototrophs$classif == "Rhodophyceae"),]
Rhodophyceae_tb_occur <- Rhodophyceae_tb[,1:92]
length(Rhodophyceae_tb_occur[,colSums(Rhodophyceae_tb_occur) > 0])
## [1] 0
Rappemonads_tb <- tb18_phototrophs[which(tb18_phototrophs$classif == "Rappemonads"),]
Rappemonads_tb_occur <- Rappemonads_tb[,1:92]
length(Rappemonads_tb_occur[,colSums(Rappemonads_tb_occur) > 0])
## [1] 0
MOCH_1_tb <- tb18_phototrophs[which(tb18_phototrophs$classif == "MOCH-1"),]
MOCH_1_tb_occur <- MOCH_1_tb[,1:92]
length(MOCH_1_tb_occur[,colSums(MOCH_1_tb_occur) > 0])
## [1] 0
MOCH_2_tb <- tb18_phototrophs[which(tb18_phototrophs$classif == "MOCH-2"),]
MOCH_2_tb_occur <- MOCH_2_tb[,1:92]
length(MOCH_2_tb_occur[,colSums(MOCH_2_tb_occur) > 0])
## [1] 0
MOCH_5_tb <- tb18_phototrophs[which(tb18_phototrophs$classif == "MOCH-5"),]
MOCH_5_tb_occur <- MOCH_5_tb[,1:92]
length(MOCH_5_tb_occur[,colSums(MOCH_5_tb_occur) > 0])
## [1] 0
Prasinophyceae_clade_VII_tb <- tb18_phototrophs[which(tb18_phototrophs$classif == "Prasinophyceae_clade-VII"),]
Prasinophyceae_clade_VII_tb_occur <- Prasinophyceae_clade_VII_tb[,1:92]
length(Prasinophyceae_clade_VII_tb_occur[,colSums(Prasinophyceae_clade_VII_tb_occur) > 0])
## [1] 0
Prasinophyceae_clade_IX_tb <- tb18_phototrophs[which(tb18_phototrophs$classif == "Prasinophyceae_clade-IX"),]
Prasinophyceae_clade_IX_tb_occur <- Prasinophyceae_clade_IX_tb[,1:92]
length(Prasinophyceae_clade_IX_tb_occur[,colSums(Prasinophyceae_clade_IX_tb_occur) > 0])
## [1] 0
Pyramimonadaceae_tb <- tb18_phototrophs[which(tb18_phototrophs$classif == "Pyramimonadaceae"),]
Pyramimonadaceae_tb_occur <- Pyramimonadaceae_tb[,1:92]
length(Pyramimonadaceae_tb_occur[,colSums(Pyramimonadaceae_tb_occur) > 0])
## [1] 0
## reads_per_class OTUs_per_class
## Bacillariophyceae 3681 103
## Bolidophyceae 2942 15
## Chlorarachniophyceae 3490 70
## Chrysophyceae 80489 367
## Cryptophyceae 9760 75
## reads_per_class OTUs_per_class samples_per_class
## Dinophyceae 666342 10065 0
## Chrysophyceae 80489 367 0
## Prasinophyceae 51443 194 0
## Pelagophyceae 46762 329 0
## Dictyochophyceae 39651 239 0
## Prasinophyceae_clade-VII 24046 126 0
## MOCH-2 17211 136 0
## Prasinophyceae_clade-IX 12147 81 0
## Cryptophyceae 9760 75 0
## MOCH-1 4429 72 0
## Bacillariophyceae 3681 103 0
## Chlorarachniophyceae 3490 70 0
## Bolidophyceae 2942 15 0
## MOCH-5 2473 22 0
## Pinguiophyceae 1647 23 0
## other_Prasinophyceae 409 7 0
## Prymnesiophyceae 371 58 0
## Mamiellophyceae 337 35 0
## Eustigmatales 237 9 0
## Raphydophyceae 209 4 0
## Trebouxiophyceae 100 3 0
## Pyramimonadaceae 97 3 0
## Ulvophyceae 16 3 0
## reads_per_class OTUs_per_class samples_per_class
## 100 100 0
## reads_per_class OTUs_per_class samples_per_class
## Dinophyceae 68.816438068 83.60328931 0
## Chrysophyceae 8.312497612 3.04842595 0
## Prasinophyceae 5.312773356 1.61142952 0
## Pelagophyceae 4.829343306 2.73278512 0
## Dictyochophyceae 4.094955122 1.98521472 0
## Prasinophyceae_clade-VII 2.483349496 1.04659855 0
## MOCH-2 1.777465199 1.12966193 0
## Prasinophyceae_clade-IX 1.254480842 0.67281336 0
## Cryptophyceae 1.007963532 0.62297533 0
## MOCH-1 0.457404762 0.59805632 0
## Bacillariophyceae 0.380155098 0.85555279 0
## Chlorarachniophyceae 0.360429582 0.58144364 0
## Bolidophyceae 0.303834909 0.12459507 0
## MOCH-5 0.255398956 0.18273943 0
## Pinguiophyceae 0.170093846 0.19104577 0
## other_Prasinophyceae 0.042239455 0.05814436 0
## Prymnesiophyceae 0.038315007 0.48176759 0
## Mamiellophyceae 0.034803659 0.29072182 0
## Eustigmatales 0.024476164 0.07475704 0
## Raphydophyceae 0.021584465 0.03322535 0
## Trebouxiophyceae 0.010327495 0.02491901 0
## Pyramimonadaceae 0.010017670 0.02491901 0
## Ulvophyceae 0.001652399 0.02491901 0
Reads per class vs. OTUs per class:
Reads per class vs. samples in which they occurr:
## [1] 43966 100
## Row.names st055_MD1013 st057_MD1042 st058_MD1080 st059_MD1100
## 1 OTU_1 579 5968 2609 1500
## 2 OTU_10 26 5 1 26
## 3 OTU_100 31 159 21 8
## 4 OTU_1000 0 95 26 21
## 5 OTU_10000 0 0 0 0
## [1] 43966 99
## st055_MD1013 st057_MD1042 st058_MD1080 st059_MD1100 st009_MD111
## OTU_1 579 5968 2609 1500 815
## OTU_10 26 5 1 26 309
## OTU_100 31 159 21 8 1340
## OTU_1000 0 95 26 21 0
## OTU_10000 0 0 0 0 0
## [1] 43966 100
## st055_MD1013 st057_MD1042 st058_MD1080 st059_MD1100 st009_MD111
## OTU_1 579 5968 2609 1500 815
## OTU_2 9289 16285 5140 1187 8207
## OTU_3 15 96 18 6 1061
## OTU_4 230 776 150 561 337
## OTU_5 68 513 287 604 396
## [1] 13720 100
## st055_MD1013 st057_MD1042 st058_MD1080 st059_MD1100 st009_MD111
## OTU_1 579 5968 2609 1500 815
## OTU_4 230 776 150 561 337
## OTU_18 1042 4241 5755 3019 1129
## OTU_25 1353 6434 4523 4370 3060
## OTU_33 185 745 1559 258 615
## st055_MD1013 st057_MD1042 st058_MD1080 st059_MD1100 st009_MD111
## OTU_1 579 5968 2609 1500 815
## OTU_4 230 776 150 561 337
## OTU_18 1042 4241 5755 3019 1129
## OTU_25 1353 6434 4523 4370 3060
## OTU_33 185 745 1559 258 615
## [1] 11479 92
## [1] 92
## [1] 92
## [1] 92
## [1] 92
## [1] 91
## [1] 92
## [1] 0
## [1] 0
## [1] 0
## [1] 90
## [1] 0
## [1] 81
## [1] 49
## [1] 31
## [1] 0
## [1] 5
## [1] 62
## [1] 4
## Group.1 x
## 7 Dinophyceae 1807655
## 4 Chrysophyceae 195117
## 13 Pelagophyceae 121123
## 15 Prasinophyceae 111654
## 6 Dictyochophyceae 104798
## 17 Prasinophyceae_clade-VII 56618
## 10 MOCH-2 46120
## 16 Prasinophyceae_clade-IX 30109
## 5 Cryptophyceae 27275
## 9 MOCH-1 13884
## 3 Chlorarachniophyceae 10322
## 1 Bacillariophyceae 9342
## 2 Bolidophyceae 7241
## 11 MOCH-5 7165
## 14 Pinguiophyceae 5345
## 18 Prymnesiophyceae 1074
## 23 other_Prasinophyceae 1063
## 12 Mamiellophyceae 935
## 20 Raphydophyceae 664
## 8 Eustigmatales 573
## 19 Pyramimonadaceae 254
## 21 Trebouxiophyceae 193
## 22 Ulvophyceae 50
## Group.1 x
## 7 Dinophyceae 11479
## 4 Chrysophyceae 418
## 13 Pelagophyceae 386
## 6 Dictyochophyceae 265
## 15 Prasinophyceae 218
## 10 MOCH-2 155
## 17 Prasinophyceae_clade-VII 133
## 1 Bacillariophyceae 111
## 5 Cryptophyceae 95
## 16 Prasinophyceae_clade-IX 86
## 9 MOCH-1 83
## 3 Chlorarachniophyceae 80
## 18 Prymnesiophyceae 74
## 12 Mamiellophyceae 36
## 14 Pinguiophyceae 27
## 11 MOCH-5 25
## 2 Bolidophyceae 16
## 23 other_Prasinophyceae 11
## 8 Eustigmatales 9
## 20 Raphydophyceae 4
## 19 Pyramimonadaceae 3
## 21 Trebouxiophyceae 3
## 22 Ulvophyceae 3
## reads_per_class OTUs_per_class
## Bacillariophyceae 9342 111
## Bolidophyceae 7241 16
## Chlorarachniophyceae 10322 80
## Chrysophyceae 195117 418
## Cryptophyceae 27275 95
## reads_per_class OTUs_per_class samples_per_class
## Dinophyceae 1807655 11479 92
## Chrysophyceae 195117 418 92
## Pelagophyceae 121123 386 92
## Prasinophyceae 111654 218 113
## Dictyochophyceae 104798 265 116
## Prasinophyceae_clade-VII 56618 133 103
## MOCH-2 46120 155 112
## Prasinophyceae_clade-IX 30109 86 112
## Cryptophyceae 27275 95 108
## MOCH-1 13884 83 111
## Chlorarachniophyceae 10322 80 97
## Bacillariophyceae 9342 111 65
## Bolidophyceae 7241 16 35
## MOCH-5 7165 25 75
## Pinguiophyceae 5345 27 75
## Prymnesiophyceae 1074 74 50
## other_Prasinophyceae 1063 11 18
## Mamiellophyceae 935 36 1
## Raphydophyceae 664 4 1
## Eustigmatales 573 9 1
## Pyramimonadaceae 254 3 1
## Trebouxiophyceae 193 3 1
## Ulvophyceae 50 3 1
## reads_per_class OTUs_per_class samples_per_class
## 100 100 1600
## reads_per_class OTUs_per_class samples_per_class
## Dinophyceae 70.650878184 83.66618076 100.000000
## Chrysophyceae 7.626005736 3.04664723 100.000000
## Pelagophyceae 4.734004176 2.81341108 100.000000
## Prasinophyceae 4.363915212 1.58892128 122.826087
## Dictyochophyceae 4.095953449 1.93148688 126.086957
## Prasinophyceae_clade-VII 2.212873265 0.96938776 111.956522
## MOCH-2 1.802566586 1.12973761 121.739130
## Prasinophyceae_clade-IX 1.176788320 0.62682216 121.739130
## Cryptophyceae 1.066023496 0.69241983 117.391304
## MOCH-1 0.542646021 0.60495627 120.652174
## Chlorarachniophyceae 0.403427847 0.58309038 105.434783
## Bacillariophyceae 0.365125261 0.80903790 70.652174
## Bolidophyceae 0.283009207 0.11661808 38.043478
## MOCH-5 0.280038803 0.18221574 81.521739
## Pinguiophyceae 0.208905429 0.19679300 81.521739
## Prymnesiophyceae 0.041976507 0.53935860 54.347826
## other_Prasinophyceae 0.041546580 0.08017493 19.565217
## Mamiellophyceae 0.036543794 0.26239067 1.086957
## Raphydophyceae 0.025951956 0.02915452 1.086957
## Eustigmatales 0.022395287 0.06559767 1.086957
## Pyramimonadaceae 0.009927405 0.02186589 1.086957
## Trebouxiophyceae 0.007543264 0.02186589 1.086957
## Ulvophyceae 0.001954214 0.02186589 1.086957
Reads per class vs. OTUs per class:
Reads OTUs per class vs. samples in which they occurr: